#include <iostream>
#include <opencv2/core.hpp>
#include <opencv2/imgproc.hpp>

static cv::Mat getOpenCVGaussianKernel2D(int ksize=3, double sigma=0) {
    if (sigma == 0) {
        sigma = 0.3 * ((ksize - 1) * 0.5 - 1) + 0.8;
    }
    cv::Mat kernelX = cv::getGaussianKernel(ksize, sigma, CV_64F);
    cv::Mat kernelY = cv::getGaussianKernel(ksize, sigma, CV_64F);
    cv::Mat kernel = kernelX * kernelY.t();
    return kernel;
}

static cv::Mat getManualGaussianKernel2D(int ksize=3, double sigma=0) {
    if (sigma == 0) {
        sigma = 0.3 * ((ksize - 1) * 0.5 - 1) + 0.8;
    }
    cv::Mat kernel(ksize, ksize, CV_64FC1, cv::Scalar::all(0));
    double denom = 2. * sigma * sigma;
    int center = ksize / 2;
    for (int i = 0; i < ksize; i++) {
        double* p = kernel.ptr<double>(i);
        for (int j = 0; j < ksize; j++) {
            double x = j - center;
            double y = i - center;
            p[j] = cv::exp(-(x * x + y * y) / denom);
        }
    }
    cv::Scalar sum;
    sum = cv::sum(kernel);
    kernel = kernel / sum;
    return kernel;
}

static cv::Mat applyGaussianKernel2D(const cv::Mat& mat, const cv::Mat& kernel) {
    CV_Assert(kernel.rows == 3 && kernel.cols == 3 && kernel.channels() == 1 && kernel.depth() == CV_64F);
    CV_Assert(mat.channels() == 1 && mat.depth() == CV_8U);

    int rows = mat.rows;
    int cols = mat.cols;
    int type = mat.type();
    cv::Mat mat_;
    cv::copyMakeBorder(mat, mat_, 1, 1, 1, 1, cv::BORDER_REFLECT_101);

    cv::Mat result;
    result.create(rows, cols, type);
    const cv::Mat_<double>& kernel_ = kernel;
    for (int i = 1; i < rows + 1; i++) {
        const uchar* pPreRow = mat_.ptr<uchar>(i - 1);
        const uchar* pCurRow = mat_.ptr<uchar>(i);
        const uchar* pNxtRow = mat_.ptr<uchar>(i + 1);
        uchar* p = result.ptr<uchar>(i - 1);
        for (int j = 1; j < cols + 1; j++) {
            p[j-1] = cv::saturate_cast<uchar>(kernel_(0, 0) * pPreRow[j-1] + kernel_(0, 1) * pPreRow[j] + kernel_(0, 2) * pPreRow[j+1] + 
                                              kernel_(1, 0) * pCurRow[j-1] + kernel_(1, 1) * pCurRow[j] + kernel_(1, 2) * pCurRow[j+1] +
                                              kernel_(2, 0) * pNxtRow[j-1] + kernel_(2, 1) * pNxtRow[j] + kernel_(2, 2) * pNxtRow[j+1]);
        }
    }
    return result;
}

int main() {
    cv::Mat mat;
    mat.create(16, 16, CV_8UC1);
    cv::randu(mat, cv::Scalar::all(0), cv::Scalar::all(255));
    std::cout << "Original Mat = \n" << cv::format(mat, cv::Formatter::FMT_PYTHON) << std::endl << std::endl;

    int ksize = 3;
    double sigma = 0;
    cv::Mat kernel1 = getOpenCVGaussianKernel2D(ksize, sigma);
    std::cout << "2D Gaussian Kernel using getGaussianKernel() =\n" << cv::format(kernel1, cv::Formatter::FMT_PYTHON) << std::endl << std::endl;
    cv::Mat kernel2 = getManualGaussianKernel2D(ksize, sigma);
    std::cout << "2D Gaussian Kernel by manual calculation =\n" << cv::format(kernel2, cv::Formatter::FMT_PYTHON) << std::endl << std::endl;
    cv::Mat kernelDiff;
    kernelDiff = kernel1 - kernel2;
    std::cout << "Kernel difference by two methods = \n" << cv::format(kernelDiff, cv::Formatter::FMT_PYTHON) << std::endl << std::endl;

    cv::Mat matFilter2D;
    filter2D(mat, matFilter2D, mat.depth(), kernel1, cv::Point(-1, -1), 0, cv::BORDER_REFLECT_101);
    std::cout << "Result by filter2D() = \n" << cv::format(matFilter2D, cv::Formatter::FMT_PYTHON) << std::endl << std::endl;

    cv::Mat matManualMask;
    matManualMask = applyGaussianKernel2D(mat, kernel2);
    std::cout << "Result by manual mask = \n" << cv::format(matManualMask, cv::Formatter::FMT_PYTHON) << std::endl << std::endl;

    cv::Mat matDiff;
    matDiff = matManualMask - matFilter2D;
    std::cout << "Difference between two methods = \n" << cv::format(matDiff, cv::Formatter::FMT_PYTHON) << std::endl << std::endl;

    return EXIT_SUCCESS;
}
